
from scrapy.selector import HtmlXPathSelector
from scrapy.spider import BaseSpider
from mycrawler.items import CrawlVersionlUrl


class Secondelevel(BaseSpider):

    name = 'CrawlVersion'

    allowed_domin = ['http://www.jyeoo.com/math/ques']

    start_urls = [
        'http://www.jyeoo.com/math/ques/search',

                  ]



    def parse(self, response):

        versionurl = 'http://www.jyeoo.com'

        hxs = HtmlXPathSelector(response)

        sites = hxs.select('//*[@id="page"]/div/table/tr[1]/td[1]/ul/li/ul/li')

        items = []

        for site in sites:
            BaseVersion = site.select('a/text()').extract()[0]
            grades = site.select('ul/li')
            for grade in grades:
                item = CrawlVersionlUrl()
                #item['version'] = BaseVersion + '' + grade.select('a/text()').extract()
                item['version'] = '%s%s'%(BaseVersion , grade.select('a/text()').extract()[0])
                #item['url'] = versionurl +  '' + grade.select('').extract()
                item['url'] = '%s%s'%(versionurl,grade.select('a/@href').extract()[0])
                item['kind'] = 2
                items.append(item)

        return  items
